require("dplyr")

## Set working directory
## to Dataverse folder

survey <- read.table("Harris_Data/Harris 2004 Public Opinion Survey, study no. 22555/harris_s22555_spss.tab", header = TRUE)

# pid
survey$pid <- as.character(survey$ID)
class(survey$pid)
table(survey$pid)

# study 
survey$study <- "22555"
class(survey$study)
table(survey$study)

# study year (year)
survey$year <- 2004
class(survey$year)
table(survey$year)

# geographic data (urban)
table(survey$Q17) 
survey$urban <- dplyr::recode(survey$Q17,
                       `1` = "Urban",
                       `2` = "Suburban",
                       `3` = "Rural")
class(survey$urban)
table(survey$urban)

# geographic data (region)
table(survey$Q104)
survey$region <- dplyr::recode(survey$Q104,
                        `1` = "East",
                        `2` = "South",
                        `3` = "Midwest",
                        `4` = "West")
class(survey$region)
table(survey$region)

# respondent head of household (hh)
survey$hh <- NA

# increasing inequality (inequality)
table(survey$Q506A2)
survey$inequality <- dplyr::recode(as.character(survey$Q506A2),
                            `1` = "Feel",
                            `2` = "Don't Feel",
                            `-8` = "Not Sure",
                            `-9` = "Refused")
survey$inequality <- as.factor(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 3
class(survey$inequality.variable)
table(survey$inequality.variable)

# union (union.self)
survey$union.self <- NA

survey$union.other <- NA

# employment (employed)
survey$employed <- NA

## emply self
survey$employed.self <- NA

# occupation
survey$occupation <- NA

# occ self
survey$occupation.self <- NA

# household size (hhsize)
survey$hhsize_over18 <- as.numeric(survey$Q204)
survey$hhsize_over18[survey$hhsize_over18 < 0] <- NA
survey$hhsize_1317 <- as.numeric(survey$Q359)
survey$hhsize_1317[survey$hhsize_1317 < 0] <-NA
survey$hhsize_less13 <- as.numeric(survey$Q360)
survey$hhsize_less13[survey$hhsize_less13 < 0] <- NA
survey$hhsize <- rowSums(survey[, c("hhsize_over18",
                                    "hhsize_1317",
                                    "hhsize_less13")],
                         na.rm = TRUE)
survey$hhsize[survey$hhsize == 0] <- NA
table(survey$hhsize)

# education (educ)
table(survey$Q216)
survey$educ <- dplyr::recode(survey$Q216,
                      `1` = "Less than high school",
                      `2` = "Completed some high school",
                      `3` = "High school graduate",
                      `4` = "Some college",
                      `5` = "Associates degree",
                      `6` = "Bachelors degree",
                      `7` = "Some graduate school",
                      `8` = "Completed post graduate",
                      `-8` = "Not sure",
                      `-9` = "Refused")
table(survey$educ)

# household income (income)
table(survey$Q231)
table(survey$Q233)
table(survey$Q235)
# individuals who answered "1) less than $50,000" to Q231
length(survey$Q233[survey$Q233 > -10])
survey$under50k <- survey$Q233
survey$under50k <- dplyr::recode(survey$under50k,
                          `1` = "$14,999 or less",
                          `2` = "$15,000 to $24,999",
                          `3` = "$25,000 to $34,999",
                          `4` = "$35,000 to $49,999",
                          `-8` = "Not sure",
                          `-9` = "Decline to answer",
                          `-99.99` = "did not answer")
#individuals who answered "2) $50,000 or more" to Q231
length(survey$Q235[survey$Q235 > -10])
survey$over50k <- survey$Q235
survey$over50k <- dplyr::recode(survey$over50k,
                         `5` = "$50,000 to $74,999",
                         `6` = "$75,000 to $99,999",
                         `7` = "$100,000 to $124,999",
                         `8` = "$125,000 to $149,999",
                         `9` = "$150,000 to $199,999",
                         `10` = "$200,000 to $249,999",
                         `11` = "$250,000 or more",
                         `-8` = "Not sure",
                         `-9` = "Decline to answer",
                         `-99.99` = "did not answer")
table(survey$over50k)
survey$income <- survey$Q231
survey$income <- dplyr::recode(survey$income,
                        `1` = "1",
                        `2` = "2",
                        `-8` = "Not sure",
                        `-9` = "Decline to answer")
survey$income[survey$income == "1"] <- survey$under50k[survey$under50k != "did not answer"]
survey$income[survey$income == "2"] <- survey$over50k[survey$over50k != "did not answer"]

table(survey$income)
class(survey$income)


# age
table(survey$Q1030)
survey$age <- as.character(survey$Q1030)
survey$age[survey$age == "-9"] <- NA
table(survey$age)
class(survey$age)

# race
table(survey$Q238)
survey$race1 <- dplyr::recode(survey$Q238,
                      `1` = "White",
                      `2` = "Black",
                      `3` = "African American",
                      `4` = "Asian or Pacific Islander",
                      `5` = "Native American or Alaskan native",
                      `6` = "Some other race",
                      `-8` = "Not sure",
                      `-9` = "Decline to answer")
survey$race2 <- dplyr::recode(survey$Q236,
                              `-9` = "Decline/not sure",
                              `-8` = "Decline/not sure",
                              `1` = "Yes, hispanic",
                              `2` = "No, not hispanic")
table(survey$race1)
table(survey$race2)
survey$race <- ifelse(survey$race1 == "Decline to answer" |
                        survey$race1 == "Not sure", 
                      "Decline/not sure", ifelse(survey$race1 == "White",
                                                 ifelse(survey$race2 == "Decline/not sure",
                                                        "Decline/not sure",
                                                        ifelse(survey$race2 == "No, not hispanic",
                                                               "Non-Hispanic White",
                                                               "Hispanic White")), "Non-white"))
table(survey$race)
table(survey$race[survey$race1 == "White"])

# politics (party)
table(survey$Q220)
survey$party <- dplyr::recode(survey$Q220,
                       `1` = "Republican",
                       `2` = "Democrat",
                       `3` = "Independent",
                       `4` = "Other",
                       `-8` = "Not sure",
                       `-9` = "Decline to answer")
table(survey$party)
class(survey$party)

# politics (ideology)
table(survey$Q226)
survey$ideology <- dplyr::recode(survey$Q226,
                          `1` = "Conservative",
                          `2` = "Moderate",
                          `3` = "Liberal",
                          `-8` = "Not sure",
                          `-9` = "Decline to answer")
table(survey$ideology)
class(survey$ideology)

# gender
table(survey$Q1020)
survey$gender <- dplyr::recode(survey$Q1020,
                        `1` = "Male",
                        `2` = "Female") 
table(survey$gender)
class(survey$gender)

# religion
survey$religion <- NA

#factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- dplyr::recode(survey$Q506A1,
                                 `1` = "Feel",
                                 `2` = "Don't Feel",
                                 `-8` = "Not Sure",
                                 `-9` = "Refused")
survey$dontcount <- dplyr::recode(survey$Q506A3,
                                  `1` = "Feel",
                                  `2` = "Don't Feel",
                                  `-8` = "Not Sure",
                                  `-9` = "Refused")
survey$leftout <- dplyr::recode(survey$Q506A4,
                                `1` = "Feel",
                                `2` = "Don't Feel",
                                `-8` = "Not Sure",
                                `-9` = "Refused")

## quesiton_place
survey$question_place <- "before party"

# subset
survey_22555 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                          "inequality", "inequality.variable", "union.self", "union.other",
                          "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                          "age", "race", "party", "ideology", "gender", "religion",
                          "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                          "question_place")]

# save file
#saveRDS(survey_22555, file = "Harris_Data/survey_22555.rds")
